head(malawi_MFL)
## # A tibble: 6 x 11
## CODE NAME `COMMON NAME` OWNERSHIP TYPE STATUS ZONE DISTRICT `DATE OPENED`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 MC01… A + … A+A Private Clin… Funct… Cent… Mchinji Jan 1st 75
## 2 BT24… A-C … A.C Opticals Private Clin… Funct… Sout… Blantyre Jan 1st 75
## 3 MZ16… A-C … A-C Opticals Mission/… Clin… Non-f… Nort… Mzimba Jan 1st 75
## 4 BT24… Akwe… Akwezeke Pvt Private Clin… Funct… Sout… Blantyre Jan 1st 75
## 5 BT24… AB M… Abowa Private Clin… Funct… Sout… Blantyre Jan 1st 75
## 6 LL04… ABC … ABC Clinic Christia… Hosp… Funct… Cent… Lilongwe Jan 1st 75
## # … with 2 more variables: LATITUDE <chr>, LONGITUDE <chr>
head(malawi_WHO)
## # A tibble: 6 x 10
## Country Admin1 `Facility name` `Facility type` Ownership Lat Long
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Malawi Centr… 80 Block Clinic Clinic MoH -12.9 33.4
## 2 Malawi Centr… ABC Community … Clinic FBO -14.0 33.7
## 3 Malawi Centr… Adventist Heal… Health Centre FBO -14.0 33.8
## 4 Malawi Centr… Alinafe Commun… Community Hosp… FBO -13.4 34.2
## 5 Malawi Centr… Area 18 Health… Health Centre MoH -13.9 33.8
## 6 Malawi Centr… Area 25 Health… Health Centre MoH -13.9 33.8
## # … with 3 more variables: `LL source` <chr>, iso3c <chr>,
## # facility_type_9 <chr>
# MFL - 1546 before omitting NA's, then 1427.
# 62 coordinates did not intersect with admin data - now 1365
# using anti_join() to see which facilities were omitted
df_malawi_facilities_MFL = as.data.frame(malawi_facilities_MFL) # convert to data frame to use in anti_join
NA_MFL = anti_join(malawi_MFL, df_malawi_facilities_MFL) # 119 missing coordinates in MFL
# adding the number of wrong coordinates - 61
intersect_admin1_MFL = st_intersection(malawi_facilities_MFL, malawi_admin1) # returns data frame of matches
intersect_admin1_MFL = as.data.frame(intersect_admin1_MFL) # convert to data frame to use in anti_join
no_intersect_admin1_MFL = anti_join(malawi_facilities_MFL, intersect_admin1_MFL) # working out which facilities did not intersect
no_intersect_admin1_MFL_1 = no_intersect_admin1_MFL[ ,-c(10, 11)]
NA_MFL_1 = NA_MFL[, -c(10, 11)]
no_intersect_admin1_MFL_1 = st_set_geometry(no_intersect_admin1_MFL_1, NULL)
names(NA_MFL_1)[3] = "COMMON.NAME"
names(NA_MFL_1)[9] = "DATE.OPENED"
NA_MFL = rbind(NA_MFL_1, no_intersect_admin1_MFL_1) # combined dataframe of NA and wrong coordinates in MFL
# WHO - 648 with 9 missing coordinates, now 639
NA_WHO = anti_join(malawi_WHO, new_malawi_WHO)
private_MFL = filter(malawi_facilities_MFL, OWNERSHIP == "Private")
# 433 private facilities, 30.3%
# which types are private?
private_MFL$TYPE = as.factor(private_MFL$TYPE)
types_private_MFL = as.data.frame(table(private_MFL$TYPE))
types_private_MFL
## Var1 Freq
## 1 Clinic 356
## 2 Dispensary 45
## 3 Health Centre 9
## 4 Health Post 1
## 5 Hospital 16
## 6 Private 6
# district per facility (does not include the 62 facilities) MFL
intersect_admin2_MFL = st_intersection(malawi_facilities_MFL, malawi_admin2)
intersect_admin2_MFL$TYPE = as.factor(intersect_admin2_MFL$TYPE)
intersect_admin2_MFL$shapeName = as.factor(intersect_admin2_MFL$shapeName)
# types by district
by_district_MFL = as.data.frame(table(intersect_admin2_MFL$TYPE, intersect_admin2_MFL$shapeName))
head(by_district_MFL)
## Var1 Var2 Freq
## 1 Central Hospital Balaka 0
## 2 Clinic Balaka 10
## 3 Dispensary Balaka 1
## 4 District Hospital Balaka 1
## 5 Health Centre Balaka 11
## 6 Health Post Balaka 4
# WHO
intersect_admin2_WHO = st_intersection(sf_malawi_WHO, malawi_admin2)
intersect_admin2_WHO$Facility.type = as.factor(intersect_admin2_WHO$Facility.type)
intersect_admin2_WHO$shapeName = as.factor(intersect_admin2_WHO$shapeName)
by_district_WHO = as.data.frame(table(intersect_admin2_WHO$Facility.type, intersect_admin2_WHO$shapeName))
head(by_district_WHO)
## Var1 Var2 Freq
## 1 Central Hospital Balaka 0
## 2 District Hospital Balaka 1
## 3 Mission Hospital Balaka 0
## 4 Rural Hospital Balaka 0
## 5 Community Hospital Balaka 0
## 6 Health Centre Balaka 13
# Example pie charts for district
## Balaka
balaka_MFL = filter(by_district_MFL, Var2 == "Balaka")
balaka_MFL = filter(balaka_MFL, Freq > 0)
balaka_MFL = balaka_MFL[ , -c(2)]
## plot
balaka_plot_MFL = sunburst(balaka_MFL, count = TRUE, legend = list(w=120))
balaka_plot_MFL
##
balaka_WHO = filter(by_district_WHO, Var2 == "Balaka")
balaka_WHO = filter(balaka_WHO, Freq > 0)
balaka_WHO = balaka_WHO[ , -c(2)]
## plot
balaka_plot_WHO = sund2b(balaka_WHO, rootLabel = "Facilities")
balaka_plot_WHO
## Lilongwe
lilongwe_MFL = filter(by_district_MFL, Var2 == "Lilongwe")
lilongwe_MFL = filter(lilongwe_MFL, Freq > 0)
lilongwe_MFL = lilongwe_MFL[ , -c(2)]
## plot
lilongwe_plot_MFL = sund2b(lilongwe_MFL, rootLabel = "Facilities")
lilongwe_plot_MFL
##
lilongwe_WHO = filter(by_district_WHO, Var2 == "Lilongwe")
lilongwe_WHO = filter(lilongwe_WHO, Freq > 0)
lilongwe_WHO = lilongwe_WHO[ , -c(2)]
## plot
lilongwe_plot_WHO = sund2b(lilongwe_WHO, rootLabel = "Facilities")
lilongwe_plot_WHO
22 names in MFL appear more than once, of these Mpepa Health Centre (2 decimal places), Liwonde Medical Clinic (identical), Ngatala Health Post (3 decimal places), Cape Maclear (identical) and LIFE STYLE PVT CLINIC (identical) have similar/same coordinates
Of these, Ngatala, Cape Maclear and LIFE STYLE PVT CLINIC are most likely the same entries as other attributes are also the same. The other facilities differ in type and so might not be duplicates.
10 names in WHO appear more than once, Mkango Health Centre has same coordinates (4 decimal places) and other attributes are also the same
Duplicate coordinates: in the MFL, 44 coordinates have been repeated at least more than once.
Interesting note is that 24 facilities in Blantyre have reused the same coordinates (34.3015278, -13.2512161) and they consist of 1 hospital and a mix of clinics and health centres. All have unique names as well.
Same coordinates (33.7895208991213, -14.0017665100358) have also been used for 4 facilities, all in different districts.
Some results are due to no coordinates being available and (-1,1) for example is inputted instead.
No duplicates in the WHO data
Duplicates between datasets?
# MFL
duplicate_name_MFL = malawi_facilities_MFL[malawi_facilities_MFL$NAME %in% malawi_facilities_MFL$NAME[duplicated(malawi_facilities_MFL$NAME)], ]
# WHO
duplicate_name_WHO = new_malawi_WHO[new_malawi_WHO$`Facility name` %in% new_malawi_WHO$`Facility name`[duplicated(new_malawi_WHO$`Facility name`)], ]
# Same coordinates within dataset
## MFL
duplicate_coord_MFL = malawi_facilities_MFL[malawi_facilities_MFL$geometry %in% malawi_facilities_MFL$geometry[duplicated(malawi_facilities_MFL$geometry)], ]
## WHO
sf_malawi_WHO$geometry[duplicated(sf_malawi_WHO$geometry)] # none
## Geometry set for 0 features
## bbox: xmin: NA ymin: NA xmax: NA ymax: NA
## geographic CRS: WGS 84
# Blantyre
blantyre_duplicate_coord_MFL = filter(duplicate_coord_MFL, DISTRICT == "Blantyre")
blantyre_MFL = filter(by_district_MFL, Var2 == "Blantyre")
blantyre_MFL = filter(blantyre_MFL, Freq > 0)
blantyre_MFL = blantyre_MFL[ , -c(2)]
## plot
blantyre_plot_MFL = sund2b(blantyre_MFL, rootLabel = "Facilities")
blantyre_plot_MFL